*****************************************************************
* Replication directory for                                   ***
* Prime locations                                             ***
* by Gabriel M. Ahlfeldt, Thilo N.H. Albers, Kristian Behrens ***
* Published in American Economic Review: Insights             ***
*****************************************************************
* 01/2025
* Stata
version 17.0

* This do file generates outcomes that summarize the geography of prime locations in Global cities

* Load metro identifier for loop
	qui import delimited "$data_125cities/METRO_LEVEL_COVARIATES/metrolist125.csv", clear
	qui tab metro_id
	local Nmetro = r(N) // Total number of cities in list
	local count = 1
	levelsof  metro_id, local(METROIDS)		

* Load data
	u "$temp/125CitiesPLs/grid125_PL_output.dta", clear
	
* Count PLs
	gen PL_count = . 
	sum metro_id 
		local Cmax = r(max)
		foreach num of local METROIDS { // loop over metro
			qui tab PLID if metro_id == `num'
			qui replace PL_count = r(r) if metro_id == `num'
			display "metro `num' done"
		}
		
* Keep only developable cells
	keep if developable == 1 | empl > 0 // Plant level data is exactly geocoded (seemingly undevelopable cells at may be partially developable). 
	 
* Gen Area
	gen grid_area = (0.25*$bmd)^2
	label var grid_area "Area in sq. km"
	ren empl employment
	
* Drop cells with missing information (do not belong to final city grid)
	drop if employment == .

* Focus on PL to compute ranks
	preserve
	keep if PL == 1
	collapse (sum)  grid_area  employment, by(PLID  metro_id PLID_Global)
	egen UPLID = group(PLID metro_id)
	egen PLrankByCity = rank(employment), by(metro_id) field
	egen Number_PL = max(PLrankByCity) , by(metro_id)
	foreach var of varlist PLrankByCity grid_area  employment{
			gen l`var' =ln(`var')
	}
 
* Save rank for later use
		keep PLID metro_id PLrankByCity Number_PL PLID_Global
		save "$temp/PLID125_metro_rank", replace // 
		restore
	
	* Compute total emplyoment by metro-data
		egen metro_emp = sum(cell_total_emp), by(metro_id)
		egen metro_area = sum(grid_area), by(metro_id)
* Save 
	save "$temp/125CitiesPLs/grid125_PL_output_Rank.dta"	, replace
	
		
* Generate PL-level data set ***************************************************
	u "$temp/125CitiesPLs/grid125_PL_output_Rank.dta"	, clear
	
	* Focus on prime locations and merge data 
		keep if PL == 1
		merge m:1 PLID_Global using "$temp/PLID125_metro_rank", keepusing(PLrankByCity)
			keep if _m == 3
			drop _m
		merge m:1 metro_id using "$data_125cities/METRO_LEVEL_COVARIATES/metro_data.dta", keepusing(metro)
			keep if _m == 3
			drop _m 
		
	* compute shares at metro
		collapse (first) metro metro_id (mean) PLrankByCity metro_emp metro_area (sum) PL_emp = cell_total_emp PL_area = grid_area, by(PLID_Global)
		ge PL_TS_emp_pct = PL_emp / metro_emp *100
		ge PL_area_pct = PL_area / metro_area*100
		drop PL_emp  metro_area metro_emp
	* Label variables
		label var PLID "Global prime locations identifier"
		label var metro "Global city name"
		label var metro_id "Global city identifier"
		label var PLrankByCity "Prime location rank within city"
		label var PL_TS_emp_pct "Prime location share of city employment (%)"
		label var PL_area_pct "Prime location share of city developable area (%)"
		label var PL_area "Prime location area (sq. km)"

	* Save data set for toolkit
		capture mkdir "$dataoutput/GobalCities"
		save "$dataoutput/GobalCities/PL-data.dta", replace	
		export delimited using "$dataoutput/GobalCities/PL-data.csv", replace	
		
		
* Generate metro-level data set ************************************************		
	u "$temp/125CitiesPLs/grid125_PL_output_Rank.dta"	, clear

* Share PLs at total employment; notice that now it will be shares of all PLs jointly at metro totals
	drop PL_emp
	egen PL_emp = sum(emp) if PL == 1, by(metro_id)
	drop PL_area
	egen PL_area = sum(grid_area) if PL == 1, by(metro_id)
	ge PL_emp_share = PL_emp / metro_emp
	ge PL_area_share = PL_area / metro_area
	
* Number of PLs 		
	keep metro_emp PL_emp_share metro_id PL_emp PL_area PL_area_share
	drop if PL_emp_share == . 
	duplicates drop
	merge 1:m metro_id using "$temp/PLID125_metro_rank", keepusing(Number_PL)
	keep PL_emp_share metro_id Number_PL metro_emp PL_area PL_area_share
	duplicates drop
	
* Merge names	
	merge 1:1 metro_id using   "$data_125cities/METRO_LEVEL_COVARIATES/metro_data"
	drop _m
	
* Label 
	label var Number_PL "Number of prime locations"
	gen PL_area_pcshare = PL_area_share *100	
	label var PL_area_pcshare "PL share of area (%)"
	gen PL_emp_pcshare = PL_emp_share *100	
	label var PL_emp_pcshare "PL share of TS employment (%)"
	label var PL_area "Prime location area (sq. km)"

* Inspect number of prime locations	
	sum Number_PL
	return list
	
* Generate historgrams
	sum Number_PL 
	local wm = r(mean)	
	local PLnum = r(max)
	hist Number_PL, start(0.5) width(1) xlabel(1[1]`PLnum') color(blue%50) graphregion(color(white)) xline(`wm')  name(num, replace)
	sum PL_area 
	local wm = r(mean)	
	hist PL_area, start(0) xlabel(0[1]9) width(0.5) color(blue%50) graphregion(color(white)) xline(`wm') name(areakm, replace)
	sum PL_area_pcshare 
	local wm = r(mean)
	hist PL_area_pcshare, start(0) width(0.05) xlabel(0[0.1]0.7) color(blue%50) graphregion(color(white)) xline(`wm') name(area, replace)
	sum PL_emp_pcshare 
	local wm = r(mean)	
	hist PL_emp_pcshare, start(0) width(5)  xlabel(0[010]80) color(blue%50) graphregion(color(white)) xline(`wm') name(emp, replace)
	graph combine num areakm area emp, cols(2) graphregion(color(white)) xsize(10) ysize(5)
* Save Appendix Figure B.3.2
	capture mkdir "$figures_App/GlobalCities"
	graph export "$figures_App/GlobalCities/FIG_B3_2_distribution.pdf", replace

* Merge country information
	merge 1:1 metro_id using "$data_125cities/METRO_LEVEL_COVARIATES/metro_country.dta"  
		drop if _m == 2
		drop _m
	
* Save data set 
	save "$temp/metro_using.dta", replace
	
* Output table with characteristics
	u "$temp/metro_using.dta", clear
	gen PL_count = Number_PL
	gen PL_TS_emp_pct = PL_emp_share*100
	gen PL_area_pct = PL_area_share*100
	gen NA = world_region == "North America"
	gen metro_name = metro
	* Classify
		gen Type1 = ""
		gen Type2 = ""
		gen Type = ""
		replace Type1 = "Monocentric" if PL_count == 1
		replace Type1 = "Duocentric" if PL_count == 2
		replace Type1 = "Polycentric" if PL_count > 2
		sum PL_TS_emp_pct,d
		local medianTSshare = r(p50)
		replace Type2 = "Agglomerated" if PL_TS_emp_pct >= `medianTSshare'
		replace Type2 = "Dispearsed" if PL_TS_emp_pct < `medianTSshare' 
		replace Type = Type1+"-"+Type2	
		tab Type
	* Sort by employment
		gsort -metro_emp
		
	* Totals
		local obs = _N+2
		set obs `obs'
		foreach var of varlist PL_area_pct   PL_TS_emp_pct {
			sum `var'  if NA == 1
			replace `var' = r(mean) if _n == _N-1
			sum `var' if NA == 0
			replace `var' = r(mean) if _n == _N
		}
		replace metro_name = "Mean, North America" if  _n == _N-1
		replace metro_name = "Mean, rest of world" if  _n == _N
		
	* Save file
		keep metro_id metro_name PL_count PL_area_pct  PL_TS_emp_pct Type	
	* Label variables
		label var metro_id "Global city identifier"
		label var PL_count "Number of prime locations"
		label var PL_TS_emp_pct "Share of prime locations at tradable services employment (%)"
		label var PL_area_pct "Share of prime locations at city area"
		label var Type "Prime location geography type"
		label var metro_name "Global city"
	* Save prime locations data set
		capture mkdir "$dataoutput/GobalCities"
		save "$dataoutput/GobalCities/metro-data.dta", replace	
		export delimited using "$dataoutput/GobalCities/metro-data.csv", replace	
	
* Generate text for Table 2 notes
	u "$temp/metro_using.dta", clear
	replace world = "Africa and Middle East" if world == "Africa" | world == "Middle East"
	sort world metro
	split metro, p(,)
	keep metro1 world_region Number_PL
	replace metro = "Sao Paulo" if _n == 125
	tostring Number_PL, gen(NPL)
	gen city = metro1+" (" +NPL +"), "
	gen first = world_region != world_region[_n-1]
	replace world_region = "" if first == 0
	replace world_region = world_region+ ": " if first == 1
	replace city = world_region +city
	keep city
	gen text = ""
	foreach num of numlist 1/125 {
		local text = city[`num']
		replace text = text + "`text'" if _n == 1 
	}
	keep text
	export delimited "$dataoutput/Footnote.csv", replace 
	* Script ends
